# Author: Gregory Smith
# Date: 12 - 3 - 2018
# Title: Secret But Constrained Appendix A
# Job: This File Includes all of the Robustness Checks Included in the Final
  # Appendix that do not rely upon bootstrapped clustered SEs 

#---- Packages 

list.of.packages <- c("tidyverse", "scales", "texreg", "corrplot",
                      "MASS", "brglm", "stargazer")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

#--- Plots and Data Manipulation 

library(tidyverse)
library(scales)

#---- Texreg, Model Output, Interaction Plots 

library(texreg)
library(stargazer)
library(corrplot)
library(MASS)
library(brglm) # PML Logit models 


# Change Working Directory to Replicate Code 
setwd("~/Dropbox/covert_ops_paper/2019-smith-io-secret-but-constrained-replication-files/")

#---- Data 

dta <- read_csv("data/smith_2019_secret_but_constrained_dta.csv")
us_domestic_data <- read_csv("data/final_us_domestic_year.csv")

#---- Adds Divided Government to the US Domestic Data  

us_domestic_data <- us_domestic_data %>%
  mutate(divided = if_else(unified == 1, 0, 1)) %>% 
  dplyr::select(-X1)

#---------- Histograms 

theme_set(theme_bw()) 

ggplot(dta, aes(as.factor(divided))) +
  stat_count(color = "gray", width = 0.5) + 
  labs(x = "Divided Government", y = "Frequency")

# ggsave("divided_hist.pdf", width = 20, height = 20, units = "cm")


ggplot(dta, aes(prezmajority)) +
  geom_histogram(color = "gray") + 
  labs(x = "The Majority of the President's Party", y = "Frequency") 

# ggsave("prezmajority_hist.pdf", width = 20, height = 20, units = "cm")


ggplot(dta, aes(lppc)) +
  geom_histogram(color = "gray") + 
  labs(x = "LPPC Scores", y = "Frequency") 

# ggsave("lppc_hist.pdf", width = 20, height = 20, units = "cm")

ggplot(dta, aes(as.factor(election))) +
  stat_count(color = "gray", width = 0.5) + 
  labs(x = "Election Year", y = "Frequency") 
#       title = "Frequency Plot of Election Year")

# ggsave("election_hist.pdf", width = 20, height = 20, units = "cm")


ggplot(dta, aes(approval_q1)) +
  geom_histogram(color = "gray") + 
  labs(x = "Quarter 1 Presidential Approval", y = "Frequency") 
#       title = "Histogram of Quarter 1 Presidential Approval")

# ggsave("approval_hist.pdf", width = 20, height = 20, units = "cm")


ggplot(dta, aes(as.factor(democracy))) +
  stat_count(color = "gray", width = 0.5) + 
  labs(x = "Joint Democracy", y = "Frequency")
#       title = "Frequency Plot of Joint Democracy")

# ggsave("democracy_hist.pdf", width = 20, height = 20, units = "cm")


ggplot(dta, aes(cinc_proportion)) +
  geom_histogram(color = "gray") + 
  labs(x = "CINC Proportion", y = "Frequency")
#       title = "Histogram of CINC Proportion")

# ggsave("cinc_hist.pdf", width = 20, height = 20, units = "cm")


ggplot(dta, aes(as.factor(clean_russ_inf))) +
  stat_count(color = "gray", width = 0.5) + 
  labs(x = "Soviet Influence", y = "Frequency")
#       title = "Frequency Plot of Soviet Covert Influence")

#ggsave("russia_hist.pdf", width = 20, height = 20, units = "cm")


ggplot(dta, aes(as.factor(oversight_change))) +
  stat_count(color = "gray", width = 0.5) + 
  labs(x = "Oversight Change", y = "Frequency")
#       title = "Frequency Plot of Oversight Change")

# ggsave("oversight_hist.pdf", width = 20, height = 20, units = "cm")


#------ Summary Statistics of Key Variables 

# "Party Majority", "LPPC Score",

key_var <- dta %>% 
  ungroup() %>% 
  dplyr::select(divided, election, approval_q1 ,democracy,
                cinc_proportion, clean_russ_inf, oversight_change, orourke_onset, 
                us_covert_onset, minor_onset, major_onset) 


names(key_var) <- c("Divided Government", "Election Year", "Qtr. 1 Approval", 
                    "Joint Democracy", "CINC Ratio",
                    "Active Russian Operation", "Oversight Change",
                    "O'Rourke Onset", "Pooled Covert Onset", "Minor MID",
                    "Major MID")

# Note: Does not seem to have a native argument for `caption' 
stargazer(data.frame(key_var), nobs = FALSE, summary = TRUE,
          omit.summary.stat = c("p25", "p75"),
          label = "summary_stats", colnames = TRUE, no.space = FALSE)

#------- Correlation Plot 
# devtools::install_github("vsimko/corrplot")

cor_data <- dta %>% 
  dplyr::select(divided, prezmajority, lppc, election, approval_q1, democracy,
                cinc_proportion, clean_russ_inf, oversight_change) %>% 
  replace_na(replace = list(cinc_proportion = 0))

names(cor_data) <- c("Divided", "Pres. Majority", "LPPC", "Election", "Approval",
                     "Democracy", "CINC",  "Russian Op.", "Oversight")


cor_matrix <- cor(cor_data)
res1 <- cor.mtest(cor_data)


col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))

corrplot(cor_matrix, method = "color", col = col(200),
         type = "upper", order = "hclust", number.cex = .7,
         addCoef.col = "black", # Add coefficient of correlation
         tl.col = "black", tl.srt = 90, # Text label color and rotation
         # Combine with significance
         p.mat = res1$p, sig.level = 0.05, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag = FALSE)

# Saves and Prints Output
# dev.off() # necessary to print the pdf output 
# png('corrplot.png', units = "in", width = 7, height = 7, res = 300) 



#------ Creates Cross Tabulations of the Dependent Variables 
# Note: These cross-tabs demonstrate that the results aren't being driven by
#  simultaneous operations 

xtabs(~ orourke_onset + major_us_onset, data = dta)
xtabs(~ orourke_onset + minor_us_onset, data = dta)
xtabs(~ us_covert_onset + major_us_onset, data = dta)
xtabs(~ us_covert_onset + minor_us_onset, data = dta)

#---- Negative Binomial Model Robustness Checks  

#---- Number of Operations Active and Initiated Per Year
year_counts <- dta %>%
  dplyr::group_by(year) %>% 
  summarise(orourke_ops_active = sum(orourke_covert_intervention, na.rm = TRUE),
            orourke_ops_initiated = sum(orourke_onset, na.rm = TRUE),
            pooled_ops_active = sum(us_covert_operation, na.rm = TRUE),
            pooled_ops_initiated = sum(us_covert_onset, na.rm = TRUE),
            major_gml_initiated = sum(major_us_onset, na.rm = TRUE),
            minor_gml_initiated = sum(minor_us_onset, na.rm = TRUE),
            major_force_initiated = sum(major_force_onset, na.rm = TRUE),
            minor_force_initiated = sum(minor_force_onset, na.rm = TRUE)) %>% 
  mutate(oversight_change = if_else(year < 1975, 0, 1))


covert_frequency <- left_join(year_counts, us_domestic_data, by = "year") %>%
  mutate(
    president = case_when(
      truman == 1      ~ "truman",
      eisen  == 1      ~ "eisenhower",
      kennedy == 1     ~ "kennedy",
      johnson == 1     ~ "johnson",
      nixon == 1      ~ "nixon",
      ford == 1        ~ "ford",
      carter == 1      ~ "carter",
      reagan == 1      ~ "reagan",
      bush   == 1      ~ "bush",
      TRUE             ~ "other"
    )
  )

#------ Negative Binomial Models 

#---- Histogram of the Count Distributions 

ggplot(covert_frequency, aes(orourke_ops_initiated)) +
  geom_histogram(color = "gray") + 
  labs(x = "Count of Operations Initiated",
       y = "Frequency")
#title = "Histogram of Covert Operations Initated Per Year: O'Rourke Sample") 

#ggsave("orke_ops_frequency.pdf", width = 20, height = 20, units = "cm")

ggplot(covert_frequency, aes(pooled_ops_initiated)) +
  geom_histogram(color = "gray") + 
  labs(x = "Count of Operations Initiated",
       y = "Frequency")
#title = "Histogram of Covert Operations Initated Per Year: Pooled Sample")

# ggsave("pool_ops_frequency.pdf", width = 20, height = 20, units = "cm")


ggplot(covert_frequency, aes(major_gml_initiated)) +
  geom_histogram(color = "gray") + 
  labs(x = "Count of MIDs Initiated",
       y = "Frequency") 
# title = "Histogram of Major MIDs Initiated Per Year")

# ggsave("major_mids_frequency.pdf", width = 20, height = 20, units = "cm")


ggplot(covert_frequency, aes(minor_gml_initiated)) +
  geom_histogram(color = "gray") + 
  labs(x = "Count of MIDs Initiated",
       y = "Frequency") 
# title = "Histogram of Minor MIDs Initiated Per Year")

# ggsave("minor_mids_frequency.pdf", width = 20, height = 20, units = "cm")



#------ Negative Binomial Models 

#----- We know that the mean != the variance (suggests NB models)
#----- See additional likelihood ratio test below 

var(covert_frequency$orourke_ops_initiated)  # 5.442968
mean(covert_frequency$orourke_ops_initiated) # 1.44186

#----- Chi-Squared Test Function 
# Note: if p < 0.05, model fit is not good and we should use a NB model 

chi_squared_test <- function(model_name) {
  
  1 - pchisq(summary(model_name)$deviance, 
             summary(model_name)$df.residual)
}


#---- Negative Binomial Models 

or_nb.1 <- glm.nb(orourke_ops_initiated ~ divided, data = covert_frequency)

or_nb.2 <- glm.nb(orourke_ops_initiated ~ divided + election  + approval_q1 +
                    oversight_change,
                  data = covert_frequency)

#-- Full + Interaction Term 
or_nb.3 <- glm.nb(orourke_ops_initiated ~ divided + election  + approval_q1 +
                    oversight_change + divided*oversight_change,
                  data = covert_frequency)

#---- Pooled Sample: Negative Binomial Models 

p_nb.1 <- glm.nb(pooled_ops_initiated ~ divided, data = covert_frequency)

p_nb.2 <- glm.nb(pooled_ops_initiated ~ divided + election + approval_q1 +
                   oversight_change,
                 data = covert_frequency)

p_nb.3 <- glm.nb(pooled_ops_initiated ~ divided + election + approval_q1 +
                   oversight_change + divided*oversight_change,
                 data = covert_frequency)

#------- Texreg Tables of Count Models  

#----- Covert Models

texreg(list(or_nb.1, or_nb.2, or_nb.3, p_nb.1, p_nb.2, p_nb.3),
        #  ci.force =  FALSE, ci.test = NULL, ci.force.level = 0.95, bold = 0.05, 
          stars = c(0.10, 0.05, 0.01),
          #float.pos = "h", dcolumn = FALSE, booktabs = TRUE, 
          use.packages = FALSE, single.row = FALSE, inline.css = TRUE, center = FALSE, 
          no.margin = FALSE,
          custom.coef.map = list("divided" = "Divided Government",
                                 "election" = "Election Year",
                                 "approval_q1" = "Qtr. 1 Approval",
                                 "oversight_change" = "Oversight Change",
                                 "divided:oversight_change" = "Oversight Interaction",
                                 "(Intercept)" = "Intercept"),
          caption = "Negative Binomial Models of Covert Onset",
          label = "table:covert_nb",
          custom.model.names = c("O'Rourke Base",  "O'Rourke Full", "O'Rourke Int.",
                                 "Pooled Base", "Pooled Full", "Pooled Int."),
          sideways = TRUE)


#----- Minor MIDs

min_mid.1 <- glm.nb(minor_gml_initiated ~ divided, data = covert_frequency)

min_mid.2 <- glm.nb(minor_gml_initiated ~ divided + election + approval_q1 +
                      oversight_change,
                    data = covert_frequency)

#---- Major MIDs

maj_mid.1 <- glm.nb(major_gml_initiated ~ divided, data = covert_frequency)

maj_mid.2 <- glm.nb(major_gml_initiated ~ divided + election + approval_q1 +
                      oversight_change, data = covert_frequency)

#----- Texreg Table for Minor and Major MIDs

texreg(list(min_mid.1, min_mid.2,
            maj_mid.1, maj_mid.2),
       stars = c(0.10, 0.05, 0.01),
       float.pos = "h", dcolumn = FALSE, booktabs = TRUE, 
       use.packages = FALSE, single.row = FALSE, inline.css = TRUE, center = FALSE, 
       no.margin = FALSE,
       custom.coef.map = list("divided" = "Divided Government",
                              "election" = "Election Year",
                              "approval_q1" = "Qtr. 1 Approval",
                              "oversight_change" = "Oversight Change",
                              "(Intercept)" = "Intercept"),
       caption = "Negative Binomial Models of Major and Minor MID Onset",
       label = "table:mid_nb",
       custom.model.names = c("Minor MID Base", "Minor MID Full",
                              "Major MID Base", "Major MID Full"),
       sideways = TRUE)

#----- O'Rourke PML Logits 

pmle_1 <- brglm(orourke_onset ~  divided + factor(president) - 1 +
                  orourke_peace_years + orourke_peace_years_2  +
                  orourke_peace_years_3, data = dta, family = "binomial")

pmle_2 <- brglm(orourke_onset ~  divided + 
                  election  + approval_q1 +
                  democracy + cinc_proportion + clean_russ_inf + 
                  oversight_change + 
                  orourke_peace_years + orourke_peace_years_2  +
                  orourke_peace_years_3, data = dta, family = "binomial")

pmle_3 <- brglm(orourke_onset ~ divided + election  + approval_q1 +
                  democracy + cinc_proportion + clean_russ_inf +
                  oversight_change + divided*oversight_change + 
                  orourke_peace_years + orourke_peace_years_2  +
                  orourke_peace_years_3,
                data = dta, family = "binomial")


#------ Pooled PML Logits  

pmle_4 <- brglm(us_covert_onset ~  divided + factor(president) - 1 +
                  us_peace_years + us_peace_years_2 + us_peace_years_3,
                data = dta, family = "binomial")

pmle_5 <- brglm(us_covert_onset ~  divided + election  + approval_q1 +
                  democracy + cinc_proportion + clean_russ_inf +
                  oversight_change +  
                  us_peace_years + us_peace_years_2 + us_peace_years_3,
                data = dta, family = "binomial") 

pmle_6 <- brglm(us_covert_onset ~  divided + election  + approval_q1 +
                  democracy + cinc_proportion + clean_russ_inf +
                  oversight_change + divided*oversight_change + 
                  us_peace_years + us_peace_years_2 + us_peace_years_3,
                data = dta, family = "binomial")

#------ PML Models Using the O'Rourke and Pooled Samples 

texreg(list(pmle_1, pmle_2, pmle_3,
            pmle_4, pmle_5, pmle_6),
       stars = c(0.10, 0.05, 0.01),
       float.pos = "h", dcolumn = FALSE, booktabs = TRUE, 
       use.packages = FALSE, single.row = FALSE, inline.css = TRUE, center = FALSE, 
       no.margin = FALSE,
       custom.coef.map = list("divided" = "Divided Government",
                              "approval_q1" = "Qtr. 1 Approval",
                              "election" = "Election Year",
                              "oversight_change" = "Oversight Change",
                              "democracy" = "Joint Democracy",
                              "cinc_proportion" = "CINC Ratio",
                              "clean_russ_inf" = "Active Russian Operation",
                              "divided:oversight_change" = "Oversight Interaction"),
       # "(Intercept)" = "Intercept"),
       caption = "PML Logit Estimates: The Determinants of US Covert Operations",
       custom.model.names = c("O'Rourke 1",  "O'Rourke 2",  "O'Rourke 3",
                              "Pooled 1",  "Pooled 2", "Pooled 3"),
       custom.note = "$^{***}p<0.01$, $^{**}p<0.05$, $^*p<0.1$. Models O'Rourke 1 and Pooled 1 contain presidential fixed effects. Peace year terms have been suppressed.",
       sideways = FALSE
)
